##### SET-UP #####

### load libraries
library(biglm); library(reshape2); library(data.table); library(lme4)
library(arm); library(ggplot2)

### set path
setwd("/path/to/this/replication/folder")

### set seed
set.seed(2365256)

### set lmer control
my_control <- lmerControl(optimizer = "nloptwrap", calc.derivs = FALSE,
                          check.rankX = c("silent.drop.cols"), 
                          optCtrl = list(maxfun = 25000))

### read in main data
df.final <- read.csv("final-many-to-many.csv", stringsAsFactors = F)

##### Table A1: Mass-elite congruence by affluence (Figure 1, main text) #####

# See the code in the main script (for Figure 1)

##### Table A2: Mass-elite congruence by affluence: 25% of country-years #####
### where preferences of the most and least affluent are least similar

### prepare data
df <- df.final[which(df.final$nobs_elite > 30),]
df$diff <- abs(df$mmean_poor - df$mmean_rich)
df <- df[which(df$diff > unname(summary(df$diff)[5])),]
df$cy <- paste(df$country, df$year, sep = " ")
cy <- sort(unique(df$cy))

### Model 1: MELR
dyads <- fread("final-dyads.csv", showProgress = T, stringsAsFactors = T)
# restrict dyads to cases we want
dyads$cy <- paste(as.character(dyads$m_country), as.character(dyads$m_year), 
                  sep = " ")
tlist <- split(dyads, f = dyads$cy)
pb <- txtProgressBar(min = 0, max = length(tlist), style = 3)
for(i in length(tlist):1){
  if(unique(tlist[[i]]$cy %in% cy)){
    next
  } else {
    tlist[[i]] <- NULL
  }
  setTxtProgressBar(pb, i)
}
dyads <- do.call(rbind, tlist)
# change reference category to the richest, multiply through the weights
dyads$class <- relevel(as.factor(dyads$class), ref = "4")
dyads$comb_weight <- dyads$m_w*dyads$e_w
# estimate model
mod1 <- biglm(emd ~ class + as.factor(m_ideology_scale_orig) + 
                   as.factor(e_ideology_scale_orig) + as.factor(scalediff), 
                 data = dyads, weights =~ dyads$comb_weight)
summary(mod1)

### Model 2: Bootstrapping
n_samp <- 250
samp_size <- 50000
est <- data.frame(matrix(ncol = 5, nrow = n_samp))
pb <- txtProgressBar(min = 0, max = n_samp, style = 3)
for(i in 1:n_samp){
  
  # sample data efficiently
  dat <- dyads[sample(.N, samp_size)]
  
  # run the regression
  mod <- suppressWarnings(
    lmer(emd ~ as.factor(class) + as.factor(m_ideology_scale_orig) + 
           as.factor(e_ideology_scale_orig) + as.factor(scalediff) +
           (1 | m_mass_id) + (1 | e_elite_id), 
         weights = comb_weight, data = dat, 
         control = my_control))
  
  # save the output
  est[i,] <- unname(fixef(mod)[c(1:5)])
  
  # keep track
  setTxtProgressBar(pb, i)
}
# estimates
apply(est[,-1], 2, mean)
sqrt(apply(est[,-1], 2, var))
apply(est[,-1], 2, quantile, c(.025, .975))

### Model 3: see the code in the main script for EMD estimates

### clean up
rm(list=ls()[!(ls() %in% c("df.final", "my_control"))])

##### Table A3: Mass-elite congruence by affluence: 25% of country-years #####
### where preferences of the middle class and most affluent are least similar

### prepare data
df <- df.final[which(df.final$nobs_elite > 30),]
df$diff <- abs(df$mmean_mid - df$mmean_rich)
df <- df[which(df$diff > unname(summary(df$diff)[5])),]
df$cy <- paste(df$country, df$year, sep = " ")
cy <- sort(unique(df$cy))

### Model 1: MELR
dyads <- fread("final-dyads.csv", showProgress = T, stringsAsFactors = T)
# restrict dyads to cases we want
dyads$cy <- paste(as.character(dyads$m_country), as.character(dyads$m_year), 
                  sep = " ")
tlist <- split(dyads, f = dyads$cy)
pb <- txtProgressBar(min = 0, max = length(tlist), style = 3)
for(i in length(tlist):1){
  if(unique(tlist[[i]]$cy %in% cy)){
    next
  } else {
    tlist[[i]] <- NULL
  }
  setTxtProgressBar(pb, i)
}
dyads <- do.call(rbind, tlist)
# change reference category to the richest, multiply through the weights
dyads$class <- relevel(as.factor(dyads$class), ref = "4")
dyads$comb_weight <- dyads$m_w*dyads$e_w
# estimate model
mod1 <- biglm(emd ~ class + as.factor(m_ideology_scale_orig) + 
                as.factor(e_ideology_scale_orig) + as.factor(scalediff), 
              data = dyads, weights =~ dyads$comb_weight)
summary(mod1)

### Model 2: Bootstrapping
n_samp <- 250
samp_size <- 50000
est <- data.frame(matrix(ncol = 5, nrow = n_samp))
pb <- txtProgressBar(min = 0, max = n_samp, style = 3)
for(i in 1:n_samp){
  
  # sample data efficiently
  dat <- dyads[sample(.N, samp_size)]
  
  # run the regression
  mod <- suppressWarnings(
    lmer(emd ~ as.factor(class) + as.factor(m_ideology_scale_orig) + 
           as.factor(e_ideology_scale_orig) + as.factor(scalediff) +
           (1 | m_mass_id) + (1 | e_elite_id), 
         weights = comb_weight, data = dat, 
         control = my_control))
  
  # save the output
  est[i,] <- unname(fixef(mod)[c(1:5)])
  
  # keep track
  setTxtProgressBar(pb, i)
}
# estimates
apply(est[,-1], 2, mean)
sqrt(apply(est[,-1], 2, var))
apply(est[,-1], 2, quantile, c(.025, .975))

### Model 3: EMD
df <- df[,c("country", "year", "emd_lessaffluent", "emd_midloaffluent",
            "emd_midaffluent", "emd_midhiaffluent", "emd_moreaffluent", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA, nrow(df))
df$class[which(df$emd == "emd_lessaffluent")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
# create the "do question scales differ" variable
df$scalediff <- rep(NA, nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
# refactor class
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
# estimate
mod3 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod3)
nobs(mod3)

### clean up
rm(list=ls()[!(ls() %in% c("df.final"))])

##### Table A3: Mass-elite congruence by affluence and issue in Latin #####
### America (Figure 3, main text)

### See the code in the main script (for Figure 3)

##### Table A4: Mass-elite congruence by affluence and issue in Sweden #####
### (Figure 4, main text)

# See the code in the main script (for Figure 4)

##### Table A5: Mass-elite congruence by affluence and issue in Africa #####
### (Figure 5, main text)

# See the code in the main script (for Figure 5)

##### Table A6: Mass-elite congruence by affluence: alternatives to post- #####
### stratifying

### Model 1: dropping elites we can't stratify on PID and gender
df <- df.final[which(df.final$nobs_elite_weighted_d > 30),]
df <- df[,c("country", "year", "emd_lessaffluent_weighted_d", 
            "emd_midloaffluent_weighted_d", "emd_midaffluent_weighted_d", 
            "emd_midhiaffluent_weighted_d", "emd_moreaffluent_weighted_d", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent_weighted_d")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent_weighted_d")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent_weighted_d")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent_weighted_d")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent_weighted_d")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod1 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod1)

### Model 2: unweighted (no stratifying)
df <- df.final[which(df.final$nobs_elite_unweighted > 30),]
df <- df[,c("country", "year", "emd_lessaffluent_unweighted", 
            "emd_midloaffluent_unweighted", "emd_midaffluent_unweighted", 
            "emd_midhiaffluent_unweighted", "emd_moreaffluent_unweighted", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent_unweighted")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent_unweighted")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent_unweighted")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent_unweighted")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent_unweighted")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(is.na(df$mscale[i]) | is.na(df$escale[i])){
    df$scalediff[i] <- 1
    next
  }
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod2 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod2)

### Model 3: 80% response rate or higher
df <- df.final[which(df.final$rr_80 == "1"),]
df <- df[,c("country", "year", "emd_lessaffluent_unweighted", 
            "emd_midloaffluent_unweighted", "emd_midaffluent_unweighted", 
            "emd_midhiaffluent_unweighted", "emd_moreaffluent_unweighted", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent_unweighted")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent_unweighted")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent_unweighted")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent_unweighted")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent_unweighted")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod3 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(scalediff), data = df)
summary(mod3)

### Model 4: 70% response rate or higher
df <- df.final[which(df.final$rr_70 == "1"),]
df <- df[,c("country", "year", "emd_lessaffluent_unweighted", 
            "emd_midloaffluent_unweighted", "emd_midaffluent_unweighted", 
            "emd_midhiaffluent_unweighted", "emd_moreaffluent_unweighted", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent_unweighted")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent_unweighted")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent_unweighted")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent_unweighted")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent_unweighted")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod4 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod4)

### Model 5: 60% response rate or higher
df <- df.final[which(df.final$rr_60 == "1"),]
df <- df[,c("country", "year", "emd_lessaffluent_unweighted", 
            "emd_midloaffluent_unweighted", "emd_midaffluent_unweighted", 
            "emd_midhiaffluent_unweighted", "emd_moreaffluent_unweighted", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent_unweighted")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent_unweighted")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent_unweighted")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent_unweighted")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent_unweighted")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod5 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod5)

### Model 6: 50% response rate or higher
df <- df.final[which(df.final$rr_50 == "1"),]
df <- df[,c("country", "year", "emd_lessaffluent_unweighted", 
            "emd_midloaffluent_unweighted", "emd_midaffluent_unweighted", 
            "emd_midhiaffluent_unweighted", "emd_moreaffluent_unweighted", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent_unweighted")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent_unweighted")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent_unweighted")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent_unweighted")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent_unweighted")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod6 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod6)

### Model 7: Less than 80% response rate
df <- df.final[which(df.final$rr_80 == "0"),]
df <- df[which(!is.na(df$emd_all)),]
df <- df[,c("country", "year", "emd_lessaffluent_unweighted", 
            "emd_midloaffluent_unweighted", "emd_midaffluent_unweighted", 
            "emd_midhiaffluent_unweighted", "emd_moreaffluent_unweighted", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent_unweighted")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent_unweighted")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent_unweighted")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent_unweighted")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent_unweighted")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod7 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod7)

### clean up
rm(list=ls()[!(ls() %in% c("df.final"))])

##### Table A7: Mass-elite congruence by affluence: Other alternative #####
### coding rules 

### Model 1: using all elite data, without dropping any dup samples
df <- df.final[which(df.final$nobs_elite_full > 30),]
df <- df[,c("country", "year", "emd_lessaffluent_full", 
            "emd_midloaffluent_full", "emd_midaffluent_full", 
            "emd_midhiaffluent_full", "emd_moreaffluent_full", 
            "escale_full", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale_full", 
                                     "mscale"), variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent_full")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent_full")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent_full")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent_full")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent_full")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale_full[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod1 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale_full) + as.factor(scalediff), 
            data = df)
summary(mod1)

### Model 2: using all elite data, including MEPs
# note this is equivalent to dropping the restriction that N >= 30 in a 
# country-year
df <- df.final
df <- df[,c("country", "year", "emd_lessaffluent_full", 
            "emd_midloaffluent_full", "emd_midaffluent_full", 
            "emd_midhiaffluent_full", "emd_moreaffluent_full", 
            "escale_full", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale_full", 
                                     "mscale"), variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent_full")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent_full")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent_full")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent_full")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent_full")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale_full[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod2 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale_full) + as.factor(scalediff), 
            data = df)
summary(mod2)

### Model 3: only mass respondents with wealth data
df <- df.final[which(df.final$nobs_elite > 30 & 
                       df.final$mclass_var == "wealth"),]
df <- df[,c("country", "year", "emd_lessaffluent", "emd_midloaffluent", 
            "emd_midaffluent", "emd_midhiaffluent", "emd_moreaffluent", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod3 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) +
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod3)

### Model 4: including scale difference interactions with affluence quintiles
df <- df.final[which(df.final$nobs_elite > 30),]
df <- df[,c("country", "year", "emd_lessaffluent", "emd_midloaffluent", 
            "emd_midaffluent", "emd_midhiaffluent", "emd_moreaffluent", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod4 <- glm(emd ~ as.factor(class)*as.factor(scalediff) + as.factor(country) + 
              as.factor(year) + as.factor(mscale) + as.factor(escale), 
            data = df)
summary(mod4)

### Model 5: N > 15
df <- df.final[which(df.final$nobs_elite > 15),]
df <- df[,c("country", "year", "emd_lessaffluent", "emd_midloaffluent", 
            "emd_midaffluent", "emd_midhiaffluent", "emd_moreaffluent", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod5 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) + 
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod5)

### Model 6: N > 50
df <- df.final[which(df.final$nobs_elite > 50),]
df <- df[,c("country", "year", "emd_lessaffluent", "emd_midloaffluent", 
            "emd_midaffluent", "emd_midhiaffluent", "emd_moreaffluent", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod6 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) + 
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod6)

### Model 7: N > 100
df <- df.final[which(df.final$nobs_elite > 100),]
df <- df[,c("country", "year", "emd_lessaffluent", "emd_midloaffluent", 
            "emd_midaffluent", "emd_midhiaffluent", "emd_moreaffluent", 
            "escale", "mscale")]
df <- reshape2::melt(df, id.vars = c("country", "year", "escale", "mscale"), 
                     variable.name = "emd")
df$class <- rep(NA,nrow(df))
df$class[which(df$emd == "emd_lessaffluent")] <- "poor"
df$class[which(df$emd == "emd_midloaffluent")] <- "middle low"
df$class[which(df$emd == "emd_midaffluent")] <- "middle"
df$class[which(df$emd == "emd_midhiaffluent")] <- "middle high"
df$class[which(df$emd == "emd_moreaffluent")] <- "rich"
df$emd <- NULL
names(df)[which(names(df) == "value")] <- "emd"
df$scalediff <- rep(NA,nrow(df))
for(i in 1:nrow(df)){
  if(as.character(df$escale[i]) == as.character(df$mscale[i])){
    df$scalediff[i] <- 1
  } else {
    df$scalediff[i] <- 0
  }
}
df$class <- factor(df$class, levels = c("rich", "poor", "middle low",
                                        "middle", "middle high")) 
mod7 <- glm(emd ~ as.factor(class) + as.factor(country) + as.factor(year) + 
              as.factor(mscale) + as.factor(escale) + as.factor(scalediff), 
            data = df)
summary(mod7)

### clean up
rm(list=ls()[!(ls() %in% c("df.final"))])

##### VERSION CONTROL #####
sessionInfo()
# R version 3.5.1 (2018-07-02)
# Platform: x86_64-apple-darwin15.6.0 (64-bit)
# Running under: macOS High Sierra 10.13.6
# 
# Matrix products: default
# BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
# LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
# 
# locale:
# [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
# 
# attached base packages:
# [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
# [1] ggplot2_3.3.2     arm_1.11-1        MASS_7.3-51.6     lme4_1.1-23      
# [5] Matrix_1.2-18     data.table_1.12.8 reshape2_1.4.4    biglm_0.9-2      
# [9] DBI_1.1.0        
# 
# loaded via a namespace (and not attached):
# [1] Rcpp_1.0.5       compiler_3.5.1   pillar_1.4.4     nloptr_1.2.2.1  
# [5] plyr_1.8.6       tools_3.5.1      boot_1.3-25      statmod_1.4.34  
# [9] lifecycle_0.2.0  tibble_3.0.2     gtable_0.3.0     nlme_3.1-148    
# [13] lattice_0.20-41  pkgconfig_2.0.3  rlang_0.4.6      rstudioapi_0.11 
# [17] coda_0.19-3      withr_2.2.0      dplyr_1.0.0      stringr_1.4.0   
# [21] generics_0.0.2   vctrs_0.3.1      tidyselect_1.1.0 grid_3.5.1      
# [25] glue_1.4.1       R6_2.4.1         minqa_1.2.4      purrr_0.3.4     
# [29] magrittr_1.5     scales_1.1.1     ellipsis_0.3.1   splines_3.5.1   
# [33] abind_1.4-5      colorspace_1.4-1 stringi_1.4.6    munsell_0.5.0   
# [37] crayon_1.3.4    
